library(tidyverse)
library(haven)
library(glue)
library(MatchIt)
library(Matching)
library(MatchingFrontier)
library(AER)
library(Amelia)
library(ebal)
library(xtable)

setwd("C:/Users/dapon/Dropbox/Gov2001 Replication/Original replication download/usedata")
survey_data <- read_dta("survey_data.dta")
survey_data$mi_m <- survey_data$`_mi_m`

survey_data <- read_dta("survey_data.dta")
survey_data$mi_m <- survey_data$`_mi_m`
covars <- c("age", "edu", "logdist", "female", "householdfinance","vote_GD")
vars <- c(covars, "treatment")
bal.formula <- formula(treatment ~ age + edu + logdist + female + householdfinance + vote_GD)


#BALANCE TABLE FOR UNMATCHED DATA
#CORRESPONDS TO TABLE 2 IN PAPER
table.out <- NULL
for (i in 1:5){
  
  impute_data <- survey_data %>% filter(mi_m == i)
  clean.data <- na.omit(impute_data[,c(covars,  "treatment")])
  mb.unmatched <- MatchBalance(bal.formula, data = clean.data, print.level = 0)
  tab.unmatched <- baltest.collect(mb.unmatched,
                                   var.names = covars,
                                   after = F)
  table.out <- rbind(table.out, tab.unmatched[,1:6])
}
table.out <- data.frame(table.out)
table.out$var <- c("age", "edu", "logdist", "female", "householdfinance", "vote_GD")
unmatched.bal.table <- table.out %>% group_by(var) %>% summarise(
  mean.Tr = mean(mean.Tr), #Average the values across all imputations. Note that logdist does not vary
  mean.Co = mean(mean.Co),
  pval = mean(T.pval))
print(xtable(unmatched.bal.table, caption = "Balance Table for Unsubsetted Data"), 
      comment = F, size = "scriptsize", include.rownames = F)


##############
##Balance table for subsets
#########
##TABLE 3 in paper
##############

#median
setwd("C:/Users/dapon/Dropbox/Gov2001 Replication/Original replication download/usedata")
survey_data <- read_dta("survey_data.dta")
survey_data$mi_m <- survey_data$`_mi_m`
#Get distance quantiles
quantiles <- quantile(unique(survey_data$logdist),na.rm=T)
quantile_text <- c("0%","25%", "50%","75%","100%")
table.out <- NULL
for(q in c(2,3,4)){
for (i in 1:5){
  
  impute_data <- survey_data %>% filter(mi_m == i & logdist<=quantiles[q])
  clean.data <- na.omit(impute_data[,c(covars,  "treatment")])
  mb.quantile <- MatchBalance(bal.formula, data = clean.data, print.level = 0)
  tab.quantile <- baltest.collect(mb.quantile,
                                   var.names = covars,
                                   after = F)
  tab.quantile <-cbind(tab.quantile, quartile = q, N=nrow(impute_data))
  table.out <- rbind(table.out, tab.quantile)
}
}
table.out <- data.frame(table.out)
table.out$var <- c("age", "edu", "logdist", "female", "householdfinance", "vote_GD")
subset.bal.table <- table.out %>% group_by(var,quartile,N) %>% summarise(
  mean.Tr = mean(as.numeric(mean.Tr)), #Average the values across all imputations. Note that logdist does not vary
  mean.Co = mean(as.numeric(mean.Co)),
  pval = mean(as.numeric(T.pval))
)
print(xtable(subset.bal.table, caption = "Balance Table for Geographically Subsetted Data"), 
      comment = F, size = "scriptsize", include.rownames=F)


##############
##Balance table for manual cutoffs matching
#########
#Table 4 in paper 
########
survey_data <- read_dta("survey_data.dta")
survey_data$mi_m <- survey_data$`_mi_m`
bal.formula <- formula(treatment ~ age + edu + logdist + female + householdfinance + vote_GD)
table.out <- NULL

for (i in 1:5){
  
  impute_data <- survey_data %>% filter(mi_m == i)
  covars <- c("age", "edu", "logdist", "female", "householdfinance","vote_GD")
  vars <- c(covars, "treatment")
  clean.data <- na.omit(impute_data[,c(covars,  "treatment", "asylumspec_burden")])
  frontier <- makeFrontier(dataset = as.data.frame(clean.data), 
                           treatment = "treatment", 
                           outcome = 'asylumspec_burden',
                           match.on = covars)
  for(p in c(450,900,1050, 1200)){ #Compute balance at several cutoffs
    frontier.dataset <- generateDataset(frontier.object = frontier, N=2046-p)
    mb.pruned <- MatchBalance(bal.formula, data = frontier.dataset, print.level = 0)
    tab.pruned <- baltest.collect(mb.pruned,
                                  var.names = covars,
                                  after = F)
    tab.pruned <- cbind(tab.pruned, p)
    table.out <- rbind(table.out, tab.pruned)
  }
}

table.out <- data.frame(table.out)
table.out$var <- c("age", "edu", "logdist", "female", "householdfinance", "vote_GD")
pruned.bal.table <- table.out %>% group_by(var, p) %>% summarise(
  mean.Tr = mean(mean.Tr),
  mean.Co = mean(mean.Co),
  pval = mean(T.pval),
)
print(xtable(pruned.bal.table, caption = "Balance Table for Matched Data"), 
      comment = F, size = "scriptsize", include.rownames=F)


################################################################################
##############
##Balance table for matching (not used in paper)
#########
table.out <- NULL
for (i in 1:5){
  survey_data <- read_dta("survey_data.dta")
  survey_data$mi_m <- survey_data$`_mi_m`
  survey_data <- survey_data %>% filter(mi_m == i)
  covars <- c("age", "edu", "logdist", "female", "householdfinance","vote_GD")
  vars <- c(covars, "treatment")
  bal.formula <- formula(treatment ~ age + edu + logdist + female + householdfinance + vote_GD)
  clean.data <- na.omit(survey_data[,c(covars,  "treatment", "asylumspec_burden")])
  m.first <- Match(Y = clean.data$asylumspec_burden, Tr = clean.data$treatment,
                   X = clean.data[,covars], Weight = 2)
  mb.matched <- MatchBalance(bal.formula, data = clean.data, 
                             match.out = m.first, print.level = 0)
  tab.matched <- baltest.collect(mb.matched, 
                                var.names = c("age","edu","logdist","female",
                                              "householdfinance","vote_GD"))
  table.out <- rbind(table.out, tab.matched[,1:6])
}

age.balance <- (table.out[1,] + table.out[7,] + table.out[13,] + 
                  table.out[19,] + table.out[25,])/5
edu.balance <- (table.out[2,] + table.out[8,] + table.out[14,] + 
                  table.out[20,] + table.out[26,])/5
logdist.balance <- (table.out[3,] + table.out[9,] + table.out[15,] + 
                      table.out[21,] + table.out[27,])/5
female.balance <- (table.out[4,] + table.out[10,] + table.out[16,] + 
                     table.out[22,] + table.out[28,])/5
householdfinance.balance <- (table.out[5,] + table.out[11,] + table.out[17,] + 
                               table.out[23,] + table.out[29,])/5
vote_GD.balance <- (table.out[6,] + table.out[12,] + table.out[18,] + 
                      table.out[24,] + table.out[30,])/5
balance.table <-rbind(age.balance, edu.balance, logdist.balance, female.balance, 
                      householdfinance.balance, vote_GD.balance)
balance.table <- as.data.frame(balance.table)
rownames(balance.table) <- c("age", "edu", "logdist", "female", "householdfinance","vote_GD")
print(xtable(balance.table, caption = "Balance for Mahalanobis Matched Data"), 
      comment = F, size = "scriptsize")



